rm(list = ls())
set.seed(1111)
library(MatchIt)
library(dplyr)
library(ggplot2)
library(haven)
library(stargazer)
library(tableone)
library(Hmisc)
library(cem)
library(Matching)
library(car)
library(Zelig)
library(RColorBrewer)
library(arm)
setwd("~/Dropbox/AttorneyMainData")
data <- read_dta("Attorney.dta")
data$hysLaw <- car::recode(data$SchoolLawyerArgue,"'Harvard'=1;'Yale'=1;'Stanford'=1;'Columbia'=1;'Chicago'=1;else=0")
data$demPres <- car::recode(data$SGOrals,"'McCree'=1;'Lee'=0;'(Acting) Fried'=0;'Fried'=0;'(Acting) Bryson'=0;'Starr'=0;'Days'=1;'(Acting) Dellinger'=1;'Waxman'=1;'(Acting) Underwood'=0; 'Olson'=0;'(Acting) Clement'=0;'Clement'=0;'Garre'=0;'(Acting) Kneedler'=1;'Kagan'=1;'(Acting) Katyal'=1;'Verrilli'=1;'(Acting) Gershengorn'=1;'(Acting) Francisco'=1;'(Acting) Wall'=1; 'Francisco'=1")
##Bryson was acting for both Bush 1 and Reagan
data$demPres[data$SGOrals=="(Acting) Bryson" & data$term=="1992"] <- 1
data$SGOralsCollapseF <- as.factor(data$SGOralsCollapse)
data$justiceNameF <- as.factor(data$justiceName)
data$ActingSGOrals <- as.numeric(factor(data$ActingSGOrals, ordered=F))-1
data$TopLawSchool <- as.numeric(factor(data$TopLawSchool, ordered=F))-1
data$DCFirm <- as.numeric(factor(data$DCFirm, ordered=F))-1
data$ClerkDummy <- as.numeric(factor(data$ClerkDummy, ordered=F))-1
data$OppPetOrResp <- as.numeric(factor(data$OppPetOrResp, ordered=F))-1
data$CriminalDummy <- as.numeric(factor(data$CriminalDummy, ordered=F))-1
data$CivLibDummy <- as.numeric(factor(data$CivLibDummy, ordered=F))-1
data$EconDummy <- as.numeric(factor(data$EconDummy, ordered=F))-1
data$Acela <- as.numeric(factor(data$Acela, ordered=F))-1
data$LawyerArgueOSG <- as.numeric(factor(data$LawyerArgueOSG, ordered=F))-1
data$GenderLawyerArgue <- as.numeric(factor(data$GenderLawyerArgue, ordered=F))-1
data$RaceLawyerArgue <- as.numeric(factor(data$RaceLawyerArgue, ordered=F))-1
data$IdeologyOppCounsel <- as.numeric(factor(data$IdeologyOppCounsel, ordered=F))-1
ctLevel <- subset(data, ct_level==1)
summary(data)
dim(data)


##Create Datasets
#Justice-level
xvars<-c("SGOralsCollapseF","ActingSGOrals","hysLaw","DCFirm","ClerkDummy","Rehnquist","Roberts","IdeologyOppCounsel","OppPetOrResp","demPres","justiceNameF")
data_nomiss <- data %>%  # MatchIt does not allow missing values
  dplyr::select(PriorOral, JVote, one_of(xvars)) %>%
  na.omit()
data_nomiss <- data.frame(data_nomiss)
table(data_nomiss$PriorOral)
pretab1<-CreateTableOne(vars=xvars, strata ="PriorOral", 
                        data=data_nomiss, test = FALSE)
print(pretab1, smd = TRUE)

##CEM
match_justice_cem <- matchit(PriorOral ~ SGOralsCollapseF+ActingSGOrals+hysLaw+DCFirm+ClerkDummy+IdeologyOppCounsel+demPres+Rehnquist+Roberts+OppPetOrResp+justiceNameF,
                             method = "cem", data = data_nomiss)
summary(match_justice_cem)
dta_justice_cem <- match.data(match_justice_cem)

##Replicate Main Results
data_justice_cem <- match.data(match_justice_cem)

full_justice <- glm(JVote~PriorOral, family="binomial", data = data); summary(full_justice)
cem_justice <- glm(JVote~PriorOral, family="binomial", data = match.data(match_justice_cem), weights=match.data(match_justice_cem)$weights); summary(cem_justice)
full_justice_m <- glm(JVote~PriorOral+hysLaw+DCFirm+ClerkDummy+Rehnquist+Roberts+OppPetOrResp+IdeologyOppCounsel+demPres+justiceNameF, family="binomial", data = data); summary(full_justice_m)
cem_justice_m <- glm(JVote~PriorOral+hysLaw+DCFirm+ClerkDummy+Rehnquist+Roberts+OppPetOrResp+IdeologyOppCounsel+demPres+justiceNameF, family="binomial", data = match.data(match_justice_cem), weights=match.data(match_justice_cem)$weights); summary(cem_justice_m)

##Estimate Clustering
library(miceadds)
full_justice_cluster <- glm.cluster(data=data, formula=JVote~PriorOral,
                              cluster="justiceNameF", family="binomial")
round(summary(full_justice_cluster),2)
summary(full_justice_cluster)
full_justice_m_cluster <- glm.cluster(data=data, formula=JVote~PriorOral+hysLaw+DCFirm+ClerkDummy+Rehnquist+Roberts+OppPetOrResp+IdeologyOppCounsel+demPres+justiceNameF,
                                    cluster="justiceNameF", family="binomial")
round(summary(full_justice_m_cluster),2)
cem_justice_cluster <- glm.cluster(data=match.data(match_justice_cem), weights=match.data(match_justice_cem)$weights, formula=JVote~PriorOral,
                                    cluster="justiceNameF", family="binomial")
round(summary(cem_justice_cluster),2)
cem_justice_m_cluster <- glm.cluster(data=match.data(match_justice_cem), weights=match.data(match_justice_cem)$weights, formula=JVote~PriorOral+hysLaw+DCFirm+ClerkDummy+Rehnquist+Roberts+OppPetOrResp+IdeologyOppCounsel+demPres+justiceNameF,
                                   cluster="justiceNameF", family="binomial")
round(summary(cem_justice_m_cluster),2)


##simulate
#Estimate unclustered models to help the simulation
full_justice <- glm(JVote~PriorOral, family="binomial", data = data); summary(full_justice)
cem_justice <- glm(JVote~PriorOral, family="binomial", data = match.data(match_justice_cem), weights=match.data(match_justice_cem)$weights); summary(cem_justice)
full_justice_m <- glm(JVote~PriorOral+hysLaw+DCFirm+ClerkDummy+Rehnquist+Roberts+OppPetOrResp+IdeologyOppCounsel+demPres+justiceNameF, family="binomial", data = data); summary(full_justice_m)
cem_justice_m <- glm(JVote~PriorOral+hysLaw+DCFirm+ClerkDummy+Rehnquist+Roberts+OppPetOrResp+IdeologyOppCounsel+demPres+justiceNameF, family="binomial", data = match.data(match_justice_cem), weights=match.data(match_justice_cem)$weights); summary(cem_justice_m)

set.seed(1111)
dd <- data
glmmodel <- full_justice
clustermodel <- full_justice_cluster
object.class <- class(glmmodel)[[1]]
summ <- summary(glmmodel, correlation = TRUE, dispersion = glmmodel$dispersion)
coef <- summ$coef[, 1:2, drop = FALSE]
coef[,2] <- sqrt(diag(clustermodel$vcov))
dimnames(coef)[[2]] <- c("coef.est", "coef.sd")
beta.hat <- coef[, 1, drop = FALSE]
sd.beta <- coef[, 2, drop = FALSE]
corr.beta <- summ$corr
n <- summ$df[1] + summ$df[2]
k <- summ$df[1]
V.beta <- corr.beta * array(sd.beta, c(k, k)) * t(array(sd.beta, 
                                                        c(k, k)))
beta <- MASS::mvrnorm(100, beta.hat, V.beta)
beta2 <- array(0, c(100, length(coefficients(glmmodel))))
dimnames(beta2) <- list(NULL, names(coefficients(glmmodel)))
beta2[, dimnames(beta2)[[2]] %in% dimnames(beta)[[2]]] <- beta
sigma <- rep(sqrt(summ$dispersion), 100)
aa <- new("sim", coef = beta2, sigma = sigma)
acoef <- coef(aa)
head(acoef)
treated <- invlogit(acoef[,1] + acoef[,2]*1)
untreated <- invlogit(acoef[,1] + acoef[,2]*0)
j_n <- treated-untreated

dd <- data
glmmodel <- full_justice_m
clustermodel <- full_justice_m_cluster
object.class <- class(glmmodel)[[1]]
summ <- summary(glmmodel, correlation = TRUE, dispersion = glmmodel$dispersion)
coef <- summ$coef[, 1:2, drop = FALSE]
coef[,2] <- sqrt(diag(clustermodel$vcov))
dimnames(coef)[[2]] <- c("coef.est", "coef.sd")
beta.hat <- coef[, 1, drop = FALSE]
sd.beta <- coef[, 2, drop = FALSE]
corr.beta <- summ$corr
n <- summ$df[1] + summ$df[2]
k <- summ$df[1]
V.beta <- corr.beta * array(sd.beta, c(k, k)) * t(array(sd.beta, 
                                                        c(k, k)))
beta <- MASS::mvrnorm(100, beta.hat, V.beta)
beta2 <- array(0, c(100, length(coefficients(glmmodel))))
dimnames(beta2) <- list(NULL, names(coefficients(glmmodel)))
beta2[, dimnames(beta2)[[2]] %in% dimnames(beta)[[2]]] <- beta
sigma <- rep(sqrt(summ$dispersion), 100)
aa <- new("sim", coef = beta2, sigma = sigma)
acoef <- coef(aa)
head(acoef)
treated <- invlogit(acoef[,"(Intercept)"] + acoef[,"PriorOral"]*1 + acoef[,"hysLaw"]*median(dd$hysLaw,na.omit=T) + acoef[,"DCFirm"]*median(dd$DCFirm,na.omit=T) + acoef[,"ClerkDummy"]*median(dd$ClerkDummy,na.omit=T) + acoef[,"Rehnquist"]*1 + acoef[,"Roberts"]*0 + acoef[,"OppPetOrResp"]*median(dd$OppPetOrResp,na.omit=T) + acoef[,"IdeologyOppCounsel"]*1  + acoef[,"demPres"]*0 + acoef[,"justiceNameFJPStevens"]*1)
untreated <- invlogit(acoef[,"(Intercept)"] + acoef[,"PriorOral"]*0 + acoef[,"hysLaw"]*median(dd$hysLaw,na.omit=T) + acoef[,"DCFirm"]*median(dd$DCFirm,na.omit=T) + acoef[,"ClerkDummy"]*median(dd$ClerkDummy,na.omit=T) + acoef[,"Rehnquist"]*1 + acoef[,"Roberts"]*0 + acoef[,"OppPetOrResp"]*median(dd$OppPetOrResp,na.omit=T) + acoef[,"IdeologyOppCounsel"]*1  + acoef[,"demPres"]*0 + acoef[,"justiceNameFJPStevens"]*1)
j_m <- treated-untreated

dd <- match.data(match_justice_cem)
glmmodel <- cem_justice
clustermodel <- cem_justice_cluster
object.class <- class(glmmodel)[[1]]
summ <- summary(glmmodel, correlation = TRUE, dispersion = glmmodel$dispersion)
coef <- summ$coef[, 1:2, drop = FALSE]
coef[,2] <- sqrt(diag(clustermodel$vcov))
dimnames(coef)[[2]] <- c("coef.est", "coef.sd")
beta.hat <- coef[, 1, drop = FALSE]
sd.beta <- coef[, 2, drop = FALSE]
corr.beta <- summ$corr
n <- summ$df[1] + summ$df[2]
k <- summ$df[1]
V.beta <- corr.beta * array(sd.beta, c(k, k)) * t(array(sd.beta, 
                                                        c(k, k)))
beta <- MASS::mvrnorm(100, beta.hat, V.beta)
beta2 <- array(0, c(100, length(coefficients(glmmodel))))
dimnames(beta2) <- list(NULL, names(coefficients(glmmodel)))
beta2[, dimnames(beta2)[[2]] %in% dimnames(beta)[[2]]] <- beta
sigma <- rep(sqrt(summ$dispersion), 100)
aa <- new("sim", coef = beta2, sigma = sigma)
acoef <- coef(aa)
head(acoef)
treated <- invlogit(acoef[,1] + acoef[,2]*1)
untreated <- invlogit(acoef[,1] + acoef[,2]*0)
j_n_match <- treated-untreated

dd <- match.data(match_justice_cem)
glmmodel <- cem_justice_m
clustermodel <- cem_justice_m_cluster
object.class <- class(glmmodel)[[1]]
summ <- summary(glmmodel, correlation = TRUE, dispersion = glmmodel$dispersion)
coef <- summ$coef[, 1:2, drop = FALSE]
coef[,2] <- sqrt(diag(clustermodel$vcov))
dimnames(coef)[[2]] <- c("coef.est", "coef.sd")
beta.hat <- coef[, 1, drop = FALSE]
sd.beta <- coef[, 2, drop = FALSE]
corr.beta <- summ$corr
n <- summ$df[1] + summ$df[2]
k <- summ$df[1]
V.beta <- corr.beta * array(sd.beta, c(k, k)) * t(array(sd.beta, 
                                                        c(k, k)))
beta <- MASS::mvrnorm(100, beta.hat, V.beta)
beta2 <- array(0, c(100, length(coefficients(glmmodel))))
dimnames(beta2) <- list(NULL, names(coefficients(glmmodel)))
beta2[, dimnames(beta2)[[2]] %in% dimnames(beta)[[2]]] <- beta
sigma <- rep(sqrt(summ$dispersion), 100)
aa <- new("sim", coef = beta2, sigma = sigma)
acoef <- coef(aa)
head(acoef)
treated <- invlogit(acoef[,"(Intercept)"] + acoef[,"PriorOral"]*1 + acoef[,"hysLaw"]*median(dd$hysLaw,na.omit=T) + acoef[,"DCFirm"]*median(dd$DCFirm,na.omit=T) + acoef[,"ClerkDummy"]*median(dd$ClerkDummy,na.omit=T) + acoef[,"Rehnquist"]*1 + acoef[,"Roberts"]*0 + acoef[,"OppPetOrResp"]*median(dd$OppPetOrResp,na.omit=T) + acoef[,"IdeologyOppCounsel"]*1  + acoef[,"demPres"]*0  + acoef[,"justiceNameFJPStevens"]*1)
untreated <- invlogit(acoef[,"(Intercept)"] + acoef[,"PriorOral"]*0 + acoef[,"hysLaw"]*median(dd$hysLaw,na.omit=T) + acoef[,"DCFirm"]*median(dd$DCFirm,na.omit=T) + acoef[,"ClerkDummy"]*median(dd$ClerkDummy,na.omit=T) + acoef[,"Rehnquist"]*1 + acoef[,"Roberts"]*0 + acoef[,"OppPetOrResp"]*median(dd$OppPetOrResp,na.omit=T) + acoef[,"IdeologyOppCounsel"]*1  + acoef[,"demPres"]*0  + acoef[,"justiceNameFJPStevens"]*1)
j_m_match <- treated-untreated

###SI, Figure A10
pdf("MainResultsCluster.pdf",width=7,height=6.6,paper='special') 
par(mar=c(5.1,2, 4.1, 2.1),mfrow=c(1,1),oma=c(0,6,0,0))
plot(NA,xlim=c(-.1,.3),ylim=c(.5,2.5),ylab="",xlab="ATT", main="",yaxt="n")
axis(2, at=c(2,1),labels=c("Naive","Multivariate"), las=2)
points(y=2.1,x=mean(j_n), pch=19,cex=1.25,col="gray70")
points(y=1.1,x=mean(j_m), pch=19,cex=1.25,col="gray70")
points(y=1.9,x=mean(j_n_match), pch=18,cex=1.5)
points(y=.9,x=mean(j_m_match), pch=18,cex=1.5)
segments(y0=2.1,y1=2.1,x0=quantile(j_n,.025),x1=quantile(j_n,.975),lwd=2,col="gray70")
segments(y0=1.1,y1=1.1,x0=quantile(j_m,.025),x1=quantile(j_m,.975),lwd=2,col="gray70")
segments(y0=1.9,y1=1.9,x0=quantile(j_n_match,.025),x1=quantile(j_n_match,.975),lwd=2)
segments(y0=.9,y1=.9,x0=quantile(j_m_match,.025),x1=quantile(j_m_match,.975),lwd=2)
abline(v=0,lwd=3,col="gray60")
legend("bottomright",c("Full Dataset","Matched Dataset"),       col=c("gray70","black"),pch=c(19,18),pt.cex=c(1,1.3))
dev.off()

